{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4f182601-e14e-4691-8e98-010f4e4395c2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T08:51:17.325773Z",
     "iopub.status.busy": "2022-04-27T08:51:17.325267Z",
     "iopub.status.idle": "2022-04-27T08:51:17.627173Z",
     "shell.execute_reply": "2022-04-27T08:51:17.626579Z",
     "shell.execute_reply.started": "2022-04-27T08:51:17.325705Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 导入包\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a362338-1dad-40e6-8545-354bc4e0581b",
   "metadata": {},
   "source": [
    "## 小结\n",
    "1. 算术运算和数据对齐\n",
    "    1. 先根据索引做数据对齐，再进行运算\n",
    "    2. 找不到相同的索引，就填充nan值\n",
    "2. 使用填充值的算术方法\n",
    "    1. 数据对齐时，以设定的填充值替代nan\n",
    "    2. 如果两边都不存在的索引位置，还是nan\n",
    "3. DataFrame和Series的混合运算\n",
    "    1. 类似NumPy不同维度数组的运算\n",
    "    2. 默认匹配DataFrame列索引，可手动指定匹配行索引"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c242b7f9-ca36-4897-a589-9b6560e91a18",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 一、算术运算和数据对齐"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ef64e022-dfef-497a-bab7-218c1d3b6352",
   "metadata": {},
   "source": [
    "### 1.1 Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8da1ff99-dd6f-4ca6-8f75-5c6423e2f9c2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T08:51:19.368870Z",
     "iopub.status.busy": "2022-04-27T08:51:19.368466Z",
     "iopub.status.idle": "2022-04-27T08:51:19.376958Z",
     "shell.execute_reply": "2022-04-27T08:51:19.375902Z",
     "shell.execute_reply.started": "2022-04-27T08:51:19.368842Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    1\n",
      "c    2\n",
      "d    3\n",
      "e    4\n",
      "dtype: int64\n",
      "a    0\n",
      "c    1\n",
      "e    2\n",
      "f    3\n",
      "g    4\n",
      "h    5\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 创建Series对象\n",
    "s1 = pd.Series(np.arange(5), index=[\"a\", \"b\", \"c\", \"d\", \"e\"])\n",
    "s2 = pd.Series(np.arange(6), index=[\"a\", \"c\", \"e\", \"f\", \"g\", \"h\"])\n",
    "\n",
    "print(s1)\n",
    "print(s2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d0872b7c-5dcb-4c48-b49c-0820432026af",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T08:53:24.503531Z",
     "iopub.status.busy": "2022-04-27T08:53:24.503290Z",
     "iopub.status.idle": "2022-04-27T08:53:24.521058Z",
     "shell.execute_reply": "2022-04-27T08:53:24.520459Z",
     "shell.execute_reply.started": "2022-04-27T08:53:24.503507Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    NaN\n",
       "c    3.0\n",
       "d    NaN\n",
       "e    6.0\n",
       "f    NaN\n",
       "g    NaN\n",
       "h    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 算术加\n",
    "s1 + s2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d282b69c-35b5-4298-8967-717cfa4378ac",
   "metadata": {},
   "source": [
    "### 1.2 DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "daf71148-c845-4fbb-a3ed-a79f000ff0d1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T12:50:26.524049Z",
     "iopub.status.busy": "2022-04-27T12:50:26.523737Z",
     "iopub.status.idle": "2022-04-27T12:50:26.537500Z",
     "shell.execute_reply": "2022-04-27T12:50:26.536484Z",
     "shell.execute_reply.started": "2022-04-27T12:50:26.524000Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A   B   C\n",
      "a  0   1   2\n",
      "b  3   4   5\n",
      "c  6   7   8\n",
      "d  9  10  11\n",
      "   A  B  H\n",
      "a  0  1  2\n",
      "c  3  4  5\n",
      "e  6  7  8\n"
     ]
    }
   ],
   "source": [
    "# 创建DataFrame对象\n",
    "df1 = pd.DataFrame(\n",
    "    np.arange(12).reshape(4, 3), index=[\"a\", \"b\", \"c\", \"d\"], columns=[\"A\", \"B\", \"C\"]\n",
    ")\n",
    "df2 = pd.DataFrame(\n",
    "    np.arange(9).reshape(3, 3), index=[\"a\", \"c\", \"e\"], columns=[\"A\", \"B\", \"H\"]\n",
    ")\n",
    "\n",
    "print(df1)\n",
    "print(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5025bd5e-62dc-45f4-9b84-4952ae462105",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T13:02:54.646449Z",
     "iopub.status.busy": "2022-04-27T13:02:54.646198Z",
     "iopub.status.idle": "2022-04-27T13:02:54.655677Z",
     "shell.execute_reply": "2022-04-27T13:02:54.654776Z",
     "shell.execute_reply.started": "2022-04-27T13:02:54.646423Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     A     B   C   H\n",
      "a  0.0   2.0 NaN NaN\n",
      "b  NaN   NaN NaN NaN\n",
      "c  9.0  11.0 NaN NaN\n",
      "d  NaN   NaN NaN NaN\n",
      "e  NaN   NaN NaN NaN\n"
     ]
    }
   ],
   "source": [
    "# 算术加\n",
    "print(df1 + df2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b74756eb-75f3-4710-bc8e-2353e9bce733",
   "metadata": {},
   "source": [
    "## 二、使用填充值的算术方法"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d9a12f6b-27c6-4e6c-bd33-8c17425db8d3",
   "metadata": {},
   "source": [
    "### 2.1 Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e323667a-473a-4860-8a53-89ed438de0cb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T14:28:36.407280Z",
     "iopub.status.busy": "2022-04-27T14:28:36.406956Z",
     "iopub.status.idle": "2022-04-27T14:28:36.413751Z",
     "shell.execute_reply": "2022-04-27T14:28:36.412969Z",
     "shell.execute_reply.started": "2022-04-27T14:28:36.407253Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    NaN\n",
       "c    3.0\n",
       "d    NaN\n",
       "e    6.0\n",
       "f    NaN\n",
       "g    NaN\n",
       "h    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用填充值的加法\n",
    "# s1+s2换成s1.add(s2)\n",
    "s1.add(s2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "42b6211a-7bb2-4811-a60b-37b2bae49b8b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T14:36:53.564612Z",
     "iopub.status.busy": "2022-04-27T14:36:53.564370Z",
     "iopub.status.idle": "2022-04-27T14:36:53.572241Z",
     "shell.execute_reply": "2022-04-27T14:36:53.570994Z",
     "shell.execute_reply.started": "2022-04-27T14:36:53.564587Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    1.0\n",
       "c    3.0\n",
       "d    3.0\n",
       "e    6.0\n",
       "f    3.0\n",
       "g    4.0\n",
       "h    5.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用填充值的加法\n",
    "# s1+s2换成s1.add(s2)\n",
    "s1.add(s2, fill_value=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a36c3f9-effe-4da7-ab73-c561a0dff869",
   "metadata": {},
   "source": [
    "### 2.2 DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2058beda-5e30-4949-8968-01bbaa05e1c7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T17:59:27.769649Z",
     "iopub.status.busy": "2022-04-27T17:59:27.769401Z",
     "iopub.status.idle": "2022-04-27T17:59:27.779205Z",
     "shell.execute_reply": "2022-04-27T17:59:27.778426Z",
     "shell.execute_reply.started": "2022-04-27T17:59:27.769626Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     A     B     C    H\n",
      "a  0.0   2.0   2.0  2.0\n",
      "b  3.0   4.0   5.0  NaN\n",
      "c  9.0  11.0   8.0  5.0\n",
      "d  9.0  10.0  11.0  NaN\n",
      "e  6.0   7.0   NaN  8.0\n"
     ]
    }
   ],
   "source": [
    "# 使用填充值的加法\n",
    "# df1+df2换成df1.add(df2)\n",
    "print(df1.add(df2, fill_value=0))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "11b38f57-ae2c-44c3-93c7-2bcaac377407",
   "metadata": {},
   "source": [
    "### 2.3 reindex "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "61b2950e-975a-4828-b251-910338a1e29f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T18:32:32.095311Z",
     "iopub.status.busy": "2022-04-27T18:32:32.094983Z",
     "iopub.status.idle": "2022-04-27T18:32:32.101861Z",
     "shell.execute_reply": "2022-04-27T18:32:32.101220Z",
     "shell.execute_reply.started": "2022-04-27T18:32:32.095286Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A   B   C   D\n",
      "a  0   1   2 NaN\n",
      "b  3   4   5 NaN\n",
      "c  6   7   8 NaN\n",
      "d  9  10  11 NaN\n"
     ]
    }
   ],
   "source": [
    "# reindex未使用填充值\n",
    "print(df1.reindex(columns=[\"A\", \"B\", \"C\", \"D\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "27e4813f-2348-4880-9f3f-98a1b4d3e8d9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-27T18:33:50.989302Z",
     "iopub.status.busy": "2022-04-27T18:33:50.988684Z",
     "iopub.status.idle": "2022-04-27T18:33:50.997715Z",
     "shell.execute_reply": "2022-04-27T18:33:50.996497Z",
     "shell.execute_reply.started": "2022-04-27T18:33:50.989255Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A   B   C   D\n",
      "a  0   1   2  99\n",
      "b  3   4   5  99\n",
      "c  6   7   8  99\n",
      "d  9  10  11  99\n"
     ]
    }
   ],
   "source": [
    "# reindex未使用填充值\n",
    "print(df1.reindex(columns=[\"A\", \"B\", \"C\", \"D\"], fill_value=99))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a587e40b-fa52-4d82-8e06-dfbd5c68ca45",
   "metadata": {},
   "source": [
    "## 三、DataFrame和Series的混合运算"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55be8eb7-11c5-4348-9a3c-4cc7f1ae94bf",
   "metadata": {},
   "source": [
    "### 启发小例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "648e866b-9a12-4bcc-ab1d-f837caf7133c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-28T01:37:22.777962Z",
     "iopub.status.busy": "2022-04-28T01:37:22.777712Z",
     "iopub.status.idle": "2022-04-28T01:37:22.782919Z",
     "shell.execute_reply": "2022-04-28T01:37:22.781794Z",
     "shell.execute_reply.started": "2022-04-28T01:37:22.777935Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0  1  2]\n",
      " [ 3  4  5]\n",
      " [ 6  7  8]\n",
      " [ 9 10 11]]\n"
     ]
    }
   ],
   "source": [
    "# 创建二维NumPy数组\n",
    "arr = np.arange(12).reshape(4, 3)\n",
    "print(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "a5e43317-1f86-4cb0-84c8-1553837979cc",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-28T01:37:26.138049Z",
     "iopub.status.busy": "2022-04-28T01:37:26.137797Z",
     "iopub.status.idle": "2022-04-28T01:37:26.142215Z",
     "shell.execute_reply": "2022-04-28T01:37:26.141407Z",
     "shell.execute_reply.started": "2022-04-28T01:37:26.138023Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 1 2]\n"
     ]
    }
   ],
   "source": [
    "# 取出arr中的第0行，作为一维数组\n",
    "arr1 = arr[0]\n",
    "print(arr1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "67dd18c8-08fe-4ffb-8494-f616cfe04ace",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-28T01:37:28.714047Z",
     "iopub.status.busy": "2022-04-28T01:37:28.713776Z",
     "iopub.status.idle": "2022-04-28T01:37:28.718605Z",
     "shell.execute_reply": "2022-04-28T01:37:28.717782Z",
     "shell.execute_reply.started": "2022-04-28T01:37:28.714022Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0 0 0]\n",
      " [3 3 3]\n",
      " [6 6 6]\n",
      " [9 9 9]]\n"
     ]
    }
   ],
   "source": [
    "# 数组运算，arr减去arr1\n",
    "print(arr - arr1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "0644cc14-0de7-4b75-9c2f-31c7aae4bdb2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-28T01:30:50.223384Z",
     "iopub.status.busy": "2022-04-28T01:30:50.223078Z",
     "iopub.status.idle": "2022-04-28T01:30:50.235701Z",
     "shell.execute_reply": "2022-04-28T01:30:50.235182Z",
     "shell.execute_reply.started": "2022-04-28T01:30:50.223346Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "A    0\n",
      "B    1\n",
      "C    2\n",
      "Name: a, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 取出df1中的第0行，作为一个Series\n",
    "s3 = df1.iloc[0]\n",
    "print(s3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "fcc58820-2827-4a92-bbac-588e8b37e140",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-28T01:34:19.495170Z",
     "iopub.status.busy": "2022-04-28T01:34:19.494881Z",
     "iopub.status.idle": "2022-04-28T01:34:19.508485Z",
     "shell.execute_reply": "2022-04-28T01:34:19.507825Z",
     "shell.execute_reply.started": "2022-04-28T01:34:19.495134Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A  B  C\n",
      "a  0  0  0\n",
      "b  3  3  3\n",
      "c  6  6  6\n",
      "d  9  9  9\n"
     ]
    }
   ],
   "source": [
    "# pd混合运算，df1-s3\n",
    "print(df1 - s3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "97547504-b5ae-48b3-947c-23cfe12b13a2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-28T01:51:10.825149Z",
     "iopub.status.busy": "2022-04-28T01:51:10.824815Z",
     "iopub.status.idle": "2022-04-28T01:51:10.829812Z",
     "shell.execute_reply": "2022-04-28T01:51:10.829088Z",
     "shell.execute_reply.started": "2022-04-28T01:51:10.825121Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0\n",
      "b    3\n",
      "c    6\n",
      "d    9\n",
      "Name: A, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 取出df1中的第0列，作为一个Series\n",
    "s4 = df1[\"A\"]\n",
    "print(s4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "39569f23-6525-4880-9957-7608a63b21ef",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-04-28T01:58:54.348845Z",
     "iopub.status.busy": "2022-04-28T01:58:54.348503Z",
     "iopub.status.idle": "2022-04-28T01:58:54.354255Z",
     "shell.execute_reply": "2022-04-28T01:58:54.353505Z",
     "shell.execute_reply.started": "2022-04-28T01:58:54.348820Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A  B  C\n",
      "a  0  1  2\n",
      "b  0  1  2\n",
      "c  0  1  2\n",
      "d  0  1  2\n"
     ]
    }
   ],
   "source": [
    "# pd混合运算，df1减去s4，按行索引广播\n",
    "# print(df1.sub(s4, axis=0))\n",
    "print(df1.sub(s4, axis=\"index\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
